import scrapy
from scrapy.loader import ItemLoader
from poetry.items import BookCateItem, BookItem, BookContent, ArticleItemLoader2
from scrapy import Request


def handle_addr(s):
    s = s.replace("\r", '')
    s = s.replace("\t", '')
    s = s.replace("\n", '')
    if s.strip() != '':
        return s
    else:
        return ''


def data_empty(lst):
    up_lst = map(lambda x: handle_addr(x), lst)
    odd_lst = [e for e in up_lst if e != '']
    if odd_lst:
        return False
    else:
        return True


class BookSpider(scrapy.Spider):
    name = 'book'
    allowed_domains = ['www.shicimingju.com']
    start_urls = ['https://www.shicimingju.com/book/']

    """ 解析典籍名及链接 """

    def parse(self, response):
        cate = response.xpath('//*[@id="main_left"]/div[contains(@class,"booknark_card")]/h2/a/text()').extract()
        description = response.xpath(
            '//*[@id="main_left"]/div[contains(@class,"booknark_card")]/div[@class="des"]/text()').extract()
        book_name = response.xpath(
            '//*[@id="main_left"]/div[contains(@class,"booknark_card")]/ul/li/a/text()').extract()
        urls = response.xpath('//*[@id="main_left"]/div[contains(@class,"booknark_card")]/ul/li/a/@href').extract()
        item_loader = ItemLoader(item=BookCateItem(), response=response)
        item_loader.add_value('cate', cate)
        item_loader.add_value('description', description)
        yield item_loader.load_item()
        for post_url in urls:
            if post_url.endswith('html'):
                yield Request('https://www.shicimingju.com' + post_url, callback=self.parse_book)
        pass

    """ 解析图书章节 """

    def parse_book(self, response):
        chapters = response.xpath('//*[@class="book-mulu"]/ul/li/a/text()').extract()
        book_name = response.xpath(
            '//*[@id="main_left"]/div[contains(@class,"bookmark-list")]/h1/text()').extract_first().strip()

        urls = response.xpath('//*[@class="book-mulu"]/ul/li/a/@href').extract()
        item_loader = ItemLoader(item=BookItem(), response=response)
        item_loader.add_value('book_name', book_name)
        item_loader.add_value('chapters', chapters)
        yield item_loader.load_item()
        for i, post_url in enumerate(urls):
            if post_url.endswith('html'):
                yield Request('https://www.shicimingju.com' + post_url, callback=self.parse_detail,
                              meta={"book_name": book_name, "chapter": chapters[i]})
        pass

    """ 解析章节详情 """

    def parse_detail(self, response):
        book_name = response.meta.get("book_name", "")
        chapter = response.meta.get("chapter", "")
        content = response.xpath('//*[@id="main_left"]//div[@class="chapter_content"]/text()').extract()
        if data_empty(content):
            content = response.xpath('//*[@id="main_left"]//div[@class="chapter_content"]/p/text()').extract()
        item_loader = ItemLoader(item=BookContent(), response=response)
        item_loader.add_value('book_name', book_name)
        item_loader.add_value('chapter', chapter)
        item_loader.add_value('content', content)
        yield item_loader.load_item()
        pass
